{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def range_matrix(r,c):\n",
    "    return np.arange(r*c).reshape((r, c))*0.1+0.1\n",
    "\n",
    "\n",
    "input_len = 3\n",
    "num_classes = 3\n",
    "n, p = 0, 0\n",
    "hidden_size = 2 # size of hidden layer of neurons\n",
    "seq_length = 3 # number of steps to unroll the RNN for\n",
    "learning_rate = 1\n",
    "\n",
    "data_len = 50000\n",
    "x = np.arange(data_len)+1\n",
    "\n",
    "ground_truth = [(x[i-1] + x[i-2]) % 3 for i in range(data_len)]\n",
    "\n",
    "# model parameters\n",
    "U = range_matrix(hidden_size, input_len) # input to hidden\n",
    "W = range_matrix(hidden_size, hidden_size) # hidden to hidden\n",
    "V = range_matrix(num_classes, hidden_size) # hidden to output\n",
    "bs = np.zeros((hidden_size, 1)) # hidden bias\n",
    "bo = np.zeros((num_classes, 1)) # output bias\n",
    "#print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "def forward_and_backprop(inputs, targets, hprev):\n",
    "    #targets = np.array(targets).reshape(3,1)\n",
    "    xs, hs, ys, ps = {}, {}, {}, {}\n",
    "    hs[-1] = np.copy(hprev)\n",
    "    loss = 0\n",
    "    # forward pass\n",
    "    for t in np.arange(seq_length):\n",
    "        xs[t] = inputs[t:t+3].reshape(input_len, 1) # make a matrix(rank 2)\n",
    "        hs[t] = np.tanh(U.dot(xs[t]) + W.dot(hs[t-1]) + bs) #计算hidden state。激活函数使用tanh\n",
    "        ys[t] = V.dot(hs[t]) + bo #计算output logits。注意这里没有激活函数，我们将在下一步计算softmax\n",
    "    \n",
    "        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # softmax\n",
    "        #print(ps[0][targets[0],0])\n",
    "    \n",
    "        loss = loss - np.log(ps[t][targets[t]]) # 计算交叉熵\n",
    "\n",
    "    #反向传播过程  \n",
    "    dU, dW, dV = np.zeros_like(U), np.zeros_like(W), np.zeros_like(V)\n",
    "    dbs, dbo = np.zeros_like(bs), np.zeros_like(bo)\n",
    "    dhnext = np.zeros_like(hs[0]) #delta(t+1_l)\n",
    "    #print(np.array(targets))\n",
    "\n",
    "  \n",
    "    for t in reversed(np.arange(seq_length)):\n",
    "        dy = np.copy(ps[t])\n",
    "        dy[targets[t]] -= 1 # softmax-交叉熵delta： y-t\n",
    "        dV += dy.dot(hs[t].T) #V-nabla\n",
    "        dbo += dy #bo-nabla\n",
    "        dh = ((V.T).dot(dy) + (W.T).dot(dhnext)) # backprop into hidden-state\n",
    "        dhraw = (1 - hs[t] * hs[t]) * dh # tanh的导数是1-logits^2  delta(t_l)\n",
    "        dbs += dhraw #bs-nabla\n",
    "        dU += dhraw.dot(xs[t].T) # U-nabla\n",
    "        if t>0:\n",
    "            dW += dhraw.dot(hs[t-1].T) # W-nabla\n",
    "        dhnext = dhraw\n",
    "   \n",
    "    \n",
    "    return loss, dU, dW, dV, dbs, dbo, hs[seq_length-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "U:\n",
      "[[-0.24492589 -0.23727184 -0.2296178 ]\n",
      " [ 0.39838373  0.49675484  0.59512595]]\n",
      "W:\n",
      "[[0.0992239  0.19968422]\n",
      " [0.3000113  0.40001275]]\n",
      "V:\n",
      "[[ 0.37622149  0.920997  ]\n",
      " [ 0.39517001  0.81996845]\n",
      " [ 0.1286085  -0.54096546]]\n"
     ]
    }
   ],
   "source": [
    "for n in range(5):\n",
    "    # prepare inputs (we're sweeping from left to right in steps seq_length long)\n",
    "    if p+seq_length+1 >= len(x) or n == 0: \n",
    "        hprev = np.zeros((hidden_size,1)) # reset RNN memory\n",
    "        p = 2 # go from start of data\n",
    "    inputs =  x[p-2:p+seq_length]    \n",
    "    targets = ground_truth[p:p+seq_length]\n",
    "    loss, dU, dW, dV, dbs, dbo, hprev = forward_and_backprop(inputs, targets, hprev)\n",
    "    # perform parameter update with Adagrad\n",
    "    for param, dparam in zip([U, W, V, bs, bo], \n",
    "                                [dU, dW, dV, dbs, dbo]):\n",
    "        param += -learning_rate * dparam #sgd\n",
    "\n",
    "    p += seq_length # move data pointer\n",
    "\n",
    "print('U:')\n",
    "print(U)\n",
    "print('W:')\n",
    "print(W)\n",
    "print('V:')\n",
    "print(V)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "如果一切正确，你应该看到如下的结果：\n",
    "```\n",
    "U:\n",
    "[[-0.24492589 -0.23727184 -0.2296178 ]\n",
    " [ 0.39838373  0.49675484  0.59512595]]\n",
    "W:\n",
    "[[ 0.0992239   0.19968422]\n",
    " [ 0.3000113   0.40001275]]\n",
    "V:\n",
    "[[ 0.37622149  0.920997  ]\n",
    " [ 0.39517001  0.81996845]\n",
    " [ 0.1286085  -0.54096546]]\n",
    "\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
